{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import pymysql\n",
    "from sklearn import (metrics, linear_model)\n",
    "import xgboost as xgb\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Connect to PIC\n",
    "con = pymysql.connect(host='localhost',\n",
    "                             user='root',\n",
    "                             password='13163232289zx',\n",
    "                             db='pmimic',\n",
    "                             cursorclass=pymysql.cursors.DictCursor)\n",
    "cur=con.cursor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   subject_id  hadm_id  icustay_id      hosp_admittime      hosp_dischtime  \\\n",
      "0          26   100000      200532 2098-11-09 18:30:55 2098-11-14 09:18:00   \n",
      "1          29   100002      200534 2062-11-29 20:52:52 2062-12-07 14:27:00   \n",
      "2          32   100005      200537 2105-09-17 09:59:36 2105-10-01 10:17:00   \n",
      "3          23   100006      200538 2108-09-29 18:35:01 2108-10-10 11:24:00   \n",
      "4          35   100007      200539 2091-11-04 12:40:40 2091-11-15 17:02:00   \n",
      "\n",
      "   hospital_expire_flag first_careunit                 dob  \\\n",
      "0                     0    General ICU 2098-06-22 00:00:00   \n",
      "1                     0    General ICU 2060-01-22 00:00:00   \n",
      "2                     0    General ICU 2105-07-22 00:00:00   \n",
      "3                     0    General ICU 2108-04-15 05:21:00   \n",
      "4                     0    General ICU 2080-01-12 00:00:00   \n",
      "\n",
      "           icu_intime         icu_outtime  icu_los gender  age_hosp_in  \n",
      "0 2098-11-09 18:30:55 2098-11-14 09:18:00   4.6160      F       0.3857  \n",
      "1 2062-11-29 20:52:52 2062-12-07 14:27:00   7.7320      F       2.8572  \n",
      "2 2105-09-17 09:59:36 2105-10-01 10:17:00  14.0121      M       0.1573  \n",
      "3 2108-09-29 18:35:00 2108-10-10 11:24:00  10.7007      M       0.4590  \n",
      "4 2091-11-04 12:40:39 2091-11-15 17:02:00  11.1815      M      11.8206  \n"
     ]
    }
   ],
   "source": [
    "#patient characteristics\n",
    "query = \\\n",
    "\"\"\"\n",
    "WITH patientcharac as (\n",
    "SELECT a.subject_id, a.hadm_id, i.icustay_id, \n",
    "    a.admittime as hosp_admittime, a.dischtime as hosp_dischtime, \n",
    "    a.hospital_expire_flag,i.first_careunit, \n",
    "    p.dob,i.intime as icu_intime, i.outtime as icu_outtime, \n",
    "    round(timestampdiff(SECOND,i.intime,i.outtime)/60/60/24,4) as icu_los, \n",
    "    p.gender, \n",
    "    round(timestampdiff(SECOND,p.dob,a.admittime)/60/60/24/365,4) as age_hosp_in\n",
    "FROM admissions a\n",
    "INNER JOIN icustays i\n",
    "ON a.hadm_id = i.hadm_id\n",
    "INNER JOIN patients p\n",
    "ON a.subject_id = p.subject_id\n",
    ")\n",
    "SELECT * from patientcharac\n",
    "where hospital_expire_flag=0;\n",
    "\"\"\"\n",
    "\n",
    "patient_characteristics = pd.read_sql_query(query,con)\n",
    "print(patient_characteristics.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#delete patients who had multiple ICU stay during a hospital stay\n",
    "query1 = \\\n",
    "\"\"\"\n",
    "WITH multiplestay AS (\n",
    "SELECT hadm_id, count(hadm_id) as obs\n",
    "FROM icustays\n",
    "GROUP BY hadm_id)\n",
    "SELECT *\n",
    "FROM multiplestay\n",
    "where obs>1;\n",
    "\"\"\"\n",
    "multiplestay = pd.read_sql_query(query1,con)\n",
    "patient_characteristics=patient_characteristics[~patient_characteristics.hadm_id.isin(multiplestay.hadm_id)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#vital_signs\n",
    "query1 = \\\n",
    "\"\"\"\n",
    "SELECT *\n",
    "FROM chartevents;\n",
    "\"\"\"\n",
    "\n",
    "vital_sign = pd.read_sql_query(query1,con)\n",
    "vital_sign=vital_sign[vital_sign.ITEMID.isin(['1001','1002','1003','1015','1016'])]\n",
    "patient_characteristics=patient_characteristics[patient_characteristics.hadm_id.isin(vital_sign.HADM_ID)]\n",
    "def dataprocess(x):\n",
    "    data=vital_sign[vital_sign.HADM_ID==x]\n",
    "    temperature_mean=data[data.ITEMID=='1001'].VALUENUM.mean()\n",
    "    temperature_std=data[data.ITEMID=='1001'].VALUENUM.std()\n",
    "    pulse_mean=data[data.ITEMID=='1002'].VALUENUM.mean()\n",
    "    pulse_std=data[data.ITEMID=='1002'].VALUENUM.std()\n",
    "    heartrate_mean=data[data.ITEMID=='1003'].VALUENUM.mean()\n",
    "    heartrate_std=data[data.ITEMID=='1003'].VALUENUM.std()\n",
    "    SBP_mean=data[data.ITEMID=='1016'].VALUENUM.mean()\n",
    "    SBP_std=data[data.ITEMID=='1016'].VALUENUM.std()\n",
    "    DBP_mean=data[data.ITEMID=='1015'].VALUENUM.mean()\n",
    "    DBP_std=data[data.ITEMID=='1015'].VALUENUM.std()\n",
    "    return temperature_mean,temperature_std,pulse_mean,pulse_std,heartrate_mean,heartrate_std,SBP_mean,SBP_std,DBP_mean,DBP_std\n",
    "patient_characteristics['vital_sign']=patient_characteristics['hadm_id'].apply(dataprocess)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "patient_characteristics['temperature_mean']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[0]\n",
    "patient_characteristics['temperature_std']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[1]\n",
    "patient_characteristics['pulse_mean']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[2]\n",
    "patient_characteristics['pulse_std']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[3]\n",
    "patient_characteristics['heartrate_mean']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[4]\n",
    "patient_characteristics['heartrate_std']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[5]\n",
    "patient_characteristics['SBP_mean']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[6]\n",
    "patient_characteristics['SBP_std']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[7]\n",
    "patient_characteristics['DBP_mean']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[8]\n",
    "patient_characteristics['DBP_std']=patient_characteristics['vital_sign'].astype(str).str.split(',').str[9]\n",
    "patient_characteristics['temperature_mean']=patient_characteristics['temperature_mean'].apply(lambda x:x[1:])\n",
    "patient_characteristics['DBP_std']=patient_characteristics['DBP_std'].apply(lambda x:x[:len(x)-1])\n",
    "def processstring(x):\n",
    "    if x==' nan':\n",
    "        x=np.nan\n",
    "    else:\n",
    "        x=round(float(x),2)\n",
    "    return x\n",
    "for c in ['temperature_mean','temperature_std','pulse_mean','pulse_std','heartrate_mean'\n",
    "          ,'heartrate_std','SBP_mean','SBP_std','DBP_mean','DBP_std']:\n",
    "    patient_characteristics[c]=patient_characteristics[c].apply(processstring)\n",
    "def processgender(x):\n",
    "    if x=='F':\n",
    "        x='1'\n",
    "    else:\n",
    "        x='0'\n",
    "    x=int(x)\n",
    "    return x\n",
    "patient_characteristics['gender']=patient_characteristics['gender'].apply(processgender)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Mean_absolute_error: 6.376199898192746\n",
      "\n",
      "Explained_variance_score: 0.39886204568062733\n"
     ]
    }
   ],
   "source": [
    "#xgboost prediction model\n",
    "X=patient_characteristics[['gender','age_hosp_in','temperature_mean','temperature_std','pulse_mean','pulse_std','heartrate_mean'\n",
    "          ,'heartrate_std','SBP_mean','SBP_std','DBP_mean','DBP_std']]\n",
    "y=patient_characteristics['icu_los']\n",
    "a_train, a_test, b_train, b_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
    "clf_xgb=xgb.XGBRegressor(booster='gbtree',objective=\"reg:linear\")\n",
    "clf_xgb.fit(a_train, b_train)\n",
    "prediction_xgb=clf_xgb.predict(a_test)\n",
    "print('\\nMean_absolute_error: {}'\\\n",
    "    .format( metrics.mean_absolute_error(b_test, prediction_xgb)))\n",
    "print('\\nExplained_variance_score: {}'\\\n",
    "    .format( metrics.explained_variance_score(b_test, prediction_xgb)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
